Setup and data loading

library(tidyverse)
library(ggplot2)
library(dplyr)
library(readr)
library(ggthemes)
library(caret)
library(randomForest)
library(pROC)
library(car)
library(broom)
library(reshape2)
library(plotly)
library(patchwork) 
library(tidyr)
library(corrplot)
library(glmnet)
library(randomForest)
raw_df <- read_csv("../dataset/FoodAccessResearchAtlasData2019.csv")

str((raw_df))
df_fixed <- raw_df %>%
  mutate(across(where(is.character), ~ na_if(., "NULL")))

missing_pct <- colMeans(is.na(df_fixed))

df_clean <- df_fixed[, missing_pct <= 0.10]

str(df_clean)
df_first3 <- df_clean %>% select(1:3)

df_rest <- df_clean %>% select(-1:-3)

binary_cols <- sapply(df_rest, function(x) {
  x_unique <- unique(na.omit(x))
  length(x_unique) == 2 && all(x_unique %in% c(0, 1))
})

df_rest_clean <- df_rest %>%
  mutate(across(names(binary_cols)[binary_cols], as.factor)) %>%
  mutate(across(names(binary_cols)[!binary_cols], as.numeric))

df <- bind_cols(df_first3, df_rest_clean)

str(df)
# SMART Q1
## Where are food deserts most geographically concentrated across the U.S., and how do these concentrations differ between urban and rural census tracts?
According to the USDA definitions, a “food desert” is typically a low-income tract that also has low access to supermarkets based on established distance criteria. Here, we use the ‘LILATracts_1And10’ column (which applies a 1-mile threshold for urban and a 10-mile threshold for rural areas) as an indicator. We assume that a value of 1 in ‘LILATracts_1And10’ indicates that the tract qualifies as a food desert.
``` r df\(FoodDesert <- as.numeric(as.character(df\)LILATracts_1And10))
state_urban_counts <- df %>% group_by(State, Urban) %>% summarise(FoodDesert = sum(FoodDesert, na.rm = TRUE), .groups = ‘drop’)
food_desert_pivot <- state_urban_counts %>% pivot_wider(names_from = Urban, values_from = FoodDesert, values_fill = 0) %>% rename(Rural Food Desert Count = 0, Urban Food Desert Count = 1) %>% mutate(Total Food Desert Count = Rural Food Desert Count + Urban Food Desert Count) %>% arrange(desc(Total Food Desert Count))
head(food_desert_pivot, 10) ```
``` r state_totals <- df %>% group_by(State) %>% summarise( TotalFoodDeserts = sum(FoodDesert, na.rm = TRUE) ) %>% arrange(desc(TotalFoodDeserts))
state_totals_top10 <- head(state_totals, 10)
ggplot(state_totals_top10, aes(x = reorder(State, TotalFoodDeserts), y = TotalFoodDeserts)) + geom_bar(stat = “identity”, fill = “coral”) + coord_flip() + labs( title = “Top 10 States by Number of Food Deserts”, x = “State”, y = “Number of Food Deserts” ) + theme_minimal(base_size = 14) ```
{=html} <div class="plotly html-widget html-fill-item" id="htmlwidget-77aedf29bf94bfee3c50" style="width:672px;height:480px;"></div> <script type="application/json" data-for="htmlwidget-77aedf29bf94bfee3c50">{"x":{"visdat":{"6c2c70ab37c3":["function () ","plotlyVisDat"]},"cur_data":"6c2c70ab37c3","attrs":{"6c2c70ab37c3":{"alpha_stroke":1,"sizes":[10,100],"spans":[1,20],"type":"choropleth","locations":["AL","AK","AZ","AR","CA","CO","CT","DE",null,"FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY"],"locationmode":"USA-states","z":[22.665534804753818,19.760479041916167,16.907894736842106,24.927113702623906,6.6799601196410769,13.92914653784219,7.85024154589372,14.953271028037383,6.7039106145251397,13.151602104256337,22.534491568727645,9.9688473520249214,13.758389261744966,10.240770465489566,19.309887193098874,10.328068043742405,18.14621409921671,13.783783783783784,22.852081488042515,8.5470085470085468,9.4244604316546763,7.6346284935241995,12.30043541364296,14.392803598200899,31.562974203338392,17.828900071890725,13.284132841328413,10.338345864661655,7.2058823529411766,13.013698630136986,5.3946053946053949,25.301204819277107,3.9835728952772076,16.229885057471265,8.2926829268292686,14.305130818892287,16.921606118546844,12.106537530266344,7.4143302180685362,5.394190871369295,19.981668194317141,14.414414414414415,17.864338482202822,19.511263841160748,8.3760683760683765,6.557377049180328,14.262990455991517,12.041522491349481,13.636363636363637,9.5545977011494259,9.1603053435114496],"colorscale":"Reds","colorbar":{"title":"Food Desert %"},"text":["Alabama <br> Food Desert %: 22.7 %","Alaska <br> Food Desert %: 19.8 %","Arizona <br> Food Desert %: 16.9 %","Arkansas <br> Food Desert %: 24.9 %","California <br> Food Desert %: 6.7 %","Colorado <br> Food Desert %: 13.9 %","Connecticut <br> Food Desert %: 7.9 %","Delaware <br> Food Desert %: 15 %","District of Columbia <br> Food Desert %: 6.7 %","Florida <br> Food Desert %: 13.2 %","Georgia <br> Food Desert %: 22.5 %","Hawaii <br> Food Desert %: 10 %","Idaho <br> Food Desert %: 13.8 %","Illinois <br> Food Desert %: 10.2 %","Indiana <br> Food Desert %: 19.3 %","Iowa <br> Food Desert %: 10.3 %","Kansas <br> Food Desert %: 18.1 %","Kentucky <br> Food Desert %: 13.8 %","Louisiana <br> Food Desert %: 22.9 %","Maine <br> Food Desert %: 8.5 %","Maryland <br> Food Desert %: 9.4 %","Massachusetts <br> Food Desert %: 7.6 %","Michigan <br> Food Desert %: 12.3 %","Minnesota <br> Food Desert %: 14.4 %","Mississippi <br> Food Desert %: 31.6 %","Missouri <br> Food Desert %: 17.8 %","Montana <br> Food Desert %: 13.3 %","Nebraska <br> Food Desert %: 10.3 %","Nevada <br> Food Desert %: 7.2 %","New Hampshire <br> Food Desert %: 13 %","New Jersey <br> Food Desert %: 5.4 %","New Mexico <br> Food Desert %: 25.3 %","New York <br> Food Desert %: 4 %","North Carolina <br> Food Desert %: 16.2 %","North Dakota <br> Food Desert %: 8.3 %","Ohio <br> Food Desert %: 14.3 %","Oklahoma <br> Food Desert %: 16.9 %","Oregon <br> Food Desert %: 12.1 %","Pennsylvania <br> Food Desert %: 7.4 %","Rhode Island <br> Food Desert %: 5.4 %","South Carolina <br> Food Desert %: 20 %","South Dakota <br> Food Desert %: 14.4 %","Tennessee <br> Food Desert %: 17.9 %","Texas <br> Food Desert %: 19.5 %","Utah <br> Food Desert %: 8.4 %","Vermont <br> Food Desert %: 6.6 %","Virginia <br> Food Desert %: 14.3 %","Washington <br> Food Desert %: 12 %","West Virginia <br> Food Desert %: 13.6 %","Wisconsin <br> Food Desert %: 9.6 %","Wyoming <br> Food Desert %: 9.2 %"],"hoverinfo":"text","inherit":true},"6c2c70ab37c3.1":{"alpha_stroke":1,"sizes":[10,100],"spans":[1,20],"type":"scattergeo","lat":[32.5901,49.25,34.219200000000001,34.733600000000003,36.534100000000002,38.677700000000002,41.592799999999997,38.677700000000002,null,27.874400000000001,32.332900000000002,31.75,43.564799999999998,40.049500000000002,40.049500000000002,41.9358,38.420400000000001,37.391500000000001,30.618099999999998,45.622599999999998,39.277799999999999,42.3645,43.136099999999999,46.394300000000001,32.675800000000002,38.334699999999998,46.823,41.335599999999999,39.106299999999997,43.3934,39.963700000000003,34.476399999999998,43.136099999999999,35.419499999999999,47.2517,40.220999999999997,35.505299999999998,43.907800000000002,40.9069,41.592799999999997,33.619,44.336500000000001,35.676699999999997,31.389700000000001,39.106299999999997,44.250799999999998,37.563000000000002,47.423099999999998,38.420400000000001,44.593699999999998,43.050400000000003],"lon":[-86.750900000000001,-127.25,-111.625,-92.299199999999999,-119.773,-105.51300000000001,-72.357299999999995,-74.984099999999998,null,-81.685000000000002,-83.373599999999996,-126.25,-113.93000000000001,-89.377600000000001,-86.080799999999996,-93.371399999999994,-98.115600000000001,-84.767399999999995,-92.272400000000005,-68.980099999999993,-76.645899999999997,-71.579999999999998,-84.686999999999998,-94.604299999999995,-89.8065,-92.5137,-109.31999999999999,-99.589799999999997,-116.851,-71.392399999999995,-74.233599999999996,-105.94199999999999,-75.144900000000007,-78.468599999999995,-100.099,-82.596299999999999,-97.123900000000006,-120.068,-77.450000000000003,-71.124399999999994,-80.505600000000001,-99.723799999999997,-86.456000000000003,-98.785700000000006,-111.33,-72.545000000000002,-78.200500000000005,-119.746,-80.666499999999999,-89.994100000000003,-107.256],"text":["23%","20%","17%","25%","7%","14%","8%","15%","7%","13%","23%","10%","14%","10%","19%","10%","18%","14%","23%","9%","9%","8%","12%","14%","32%","18%","13%","10%","7%","13%","5%","25%","4%","16%","8%","14%","17%","12%","7%","5%","20%","14%","18%","20%","8%","7%","14%","12%","14%","10%","9%"],"mode":"text","textfont":{"color":"black","size":9},"showlegend":false,"inherit":true}},"layout":{"margin":{"b":40,"l":60,"t":25,"r":10},"title":"Percentage of Food Desert Tracts by State (2019)","geo":{"scope":"usa","projection":{"type":"albers usa"},"showlakes":false,"lakecolor":"white"},"scene":{"zaxis":{"title":[]}},"hovermode":"closest","showlegend":false,"legend":{"yanchor":"top","y":0.5}},"source":"A","config":{"modeBarButtonsToAdd":["hoverclosest","hovercompare"],"showSendToCloud":false},"data":[{"colorbar":{"title":"Food Desert %","ticklen":2,"len":0.5,"lenmode":"fraction","y":1,"yanchor":"top"},"colorscale":"Reds","showscale":true,"type":"choropleth","locations":["AL","AK","AZ","AR","CA","CO","CT","DE",null,"FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY"],"locationmode":"USA-states","z":[22.665534804753818,19.760479041916167,16.907894736842106,24.927113702623906,6.6799601196410769,13.92914653784219,7.85024154589372,14.953271028037383,6.7039106145251397,13.151602104256337,22.534491568727645,9.9688473520249214,13.758389261744966,10.240770465489566,19.309887193098874,10.328068043742405,18.14621409921671,13.783783783783784,22.852081488042515,8.5470085470085468,9.4244604316546763,7.6346284935241995,12.30043541364296,14.392803598200899,31.562974203338392,17.828900071890725,13.284132841328413,10.338345864661655,7.2058823529411766,13.013698630136986,5.3946053946053949,25.301204819277107,3.9835728952772076,16.229885057471265,8.2926829268292686,14.305130818892287,16.921606118546844,12.106537530266344,7.4143302180685362,5.394190871369295,19.981668194317141,14.414414414414415,17.864338482202822,19.511263841160748,8.3760683760683765,6.557377049180328,14.262990455991517,12.041522491349481,13.636363636363637,9.5545977011494259,9.1603053435114496],"text":["Alabama <br> Food Desert %: 22.7 %","Alaska <br> Food Desert %: 19.8 %","Arizona <br> Food Desert %: 16.9 %","Arkansas <br> Food Desert %: 24.9 %","California <br> Food Desert %: 6.7 %","Colorado <br> Food Desert %: 13.9 %","Connecticut <br> Food Desert %: 7.9 %","Delaware <br> Food Desert %: 15 %","District of Columbia <br> Food Desert %: 6.7 %","Florida <br> Food Desert %: 13.2 %","Georgia <br> Food Desert %: 22.5 %","Hawaii <br> Food Desert %: 10 %","Idaho <br> Food Desert %: 13.8 %","Illinois <br> Food Desert %: 10.2 %","Indiana <br> Food Desert %: 19.3 %","Iowa <br> Food Desert %: 10.3 %","Kansas <br> Food Desert %: 18.1 %","Kentucky <br> Food Desert %: 13.8 %","Louisiana <br> Food Desert %: 22.9 %","Maine <br> Food Desert %: 8.5 %","Maryland <br> Food Desert %: 9.4 %","Massachusetts <br> Food Desert %: 7.6 %","Michigan <br> Food Desert %: 12.3 %","Minnesota <br> Food Desert %: 14.4 %","Mississippi <br> Food Desert %: 31.6 %","Missouri <br> Food Desert %: 17.8 %","Montana <br> Food Desert %: 13.3 %","Nebraska <br> Food Desert %: 10.3 %","Nevada <br> Food Desert %: 7.2 %","New Hampshire <br> Food Desert %: 13 %","New Jersey <br> Food Desert %: 5.4 %","New Mexico <br> Food Desert %: 25.3 %","New York <br> Food Desert %: 4 %","North Carolina <br> Food Desert %: 16.2 %","North Dakota <br> Food Desert %: 8.3 %","Ohio <br> Food Desert %: 14.3 %","Oklahoma <br> Food Desert %: 16.9 %","Oregon <br> Food Desert %: 12.1 %","Pennsylvania <br> Food Desert %: 7.4 %","Rhode Island <br> Food Desert %: 5.4 %","South Carolina <br> Food Desert %: 20 %","South Dakota <br> Food Desert %: 14.4 %","Tennessee <br> Food Desert %: 17.9 %","Texas <br> Food Desert %: 19.5 %","Utah <br> Food Desert %: 8.4 %","Vermont <br> Food Desert %: 6.6 %","Virginia <br> Food Desert %: 14.3 %","Washington <br> Food Desert %: 12 %","West Virginia <br> Food Desert %: 13.6 %","Wisconsin <br> Food Desert %: 9.6 %","Wyoming <br> Food Desert %: 9.2 %"],"hoverinfo":["text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text"],"marker":{"line":{"color":"rgba(31,119,180,1)"}},"frame":null},{"type":"scattergeo","lat":[32.5901,49.25,34.219200000000001,34.733600000000003,36.534100000000002,38.677700000000002,41.592799999999997,38.677700000000002,null,27.874400000000001,32.332900000000002,31.75,43.564799999999998,40.049500000000002,40.049500000000002,41.9358,38.420400000000001,37.391500000000001,30.618099999999998,45.622599999999998,39.277799999999999,42.3645,43.136099999999999,46.394300000000001,32.675800000000002,38.334699999999998,46.823,41.335599999999999,39.106299999999997,43.3934,39.963700000000003,34.476399999999998,43.136099999999999,35.419499999999999,47.2517,40.220999999999997,35.505299999999998,43.907800000000002,40.9069,41.592799999999997,33.619,44.336500000000001,35.676699999999997,31.389700000000001,39.106299999999997,44.250799999999998,37.563000000000002,47.423099999999998,38.420400000000001,44.593699999999998,43.050400000000003],"lon":[-86.750900000000001,-127.25,-111.625,-92.299199999999999,-119.773,-105.51300000000001,-72.357299999999995,-74.984099999999998,null,-81.685000000000002,-83.373599999999996,-126.25,-113.93000000000001,-89.377600000000001,-86.080799999999996,-93.371399999999994,-98.115600000000001,-84.767399999999995,-92.272400000000005,-68.980099999999993,-76.645899999999997,-71.579999999999998,-84.686999999999998,-94.604299999999995,-89.8065,-92.5137,-109.31999999999999,-99.589799999999997,-116.851,-71.392399999999995,-74.233599999999996,-105.94199999999999,-75.144900000000007,-78.468599999999995,-100.099,-82.596299999999999,-97.123900000000006,-120.068,-77.450000000000003,-71.124399999999994,-80.505600000000001,-99.723799999999997,-86.456000000000003,-98.785700000000006,-111.33,-72.545000000000002,-78.200500000000005,-119.746,-80.666499999999999,-89.994100000000003,-107.256],"text":["23%","20%","17%","25%","7%","14%","8%","15%","7%","13%","23%","10%","14%","10%","19%","10%","18%","14%","23%","9%","9%","8%","12%","14%","32%","18%","13%","10%","7%","13%","5%","25%","4%","16%","8%","14%","17%","12%","7%","5%","20%","14%","18%","20%","8%","7%","14%","12%","14%","10%","9%"],"mode":"text","textfont":{"color":"black","size":9},"showlegend":false,"marker":{"color":"rgba(255,127,14,1)","line":{"color":"rgba(255,127,14,1)"}},"line":{"color":"rgba(255,127,14,1)"},"frame":null}],"highlight":{"on":"plotly_click","persistent":false,"dynamic":false,"selectize":false,"opacityDim":0.20000000000000001,"selected":{"opacity":1},"debounce":0},"shinyEvents":["plotly_hover","plotly_click","plotly_selected","plotly_relayout","plotly_brushed","plotly_brushing","plotly_clickannotation","plotly_doubleclick","plotly_deselect","plotly_afterplot","plotly_sunburstclick"],"base_url":"https://plot.ly"},"evals":[],"jsHooks":[]}</script>
{=html} <div class="plotly html-widget html-fill-item" id="htmlwidget-0e7d12cc2449483e1f08" style="width:672px;height:480px;"></div> <script type="application/json" data-for="htmlwidget-0e7d12cc2449483e1f08">{"x":{"visdat":{"6c2c431a4039":["function () ","plotlyVisDat"]},"cur_data":"6c2c431a4039","attrs":{"6c2c431a4039":{"alpha_stroke":1,"sizes":[10,100],"spans":[1,20],"type":"choropleth","locations":["AL","AK","AZ","AR","CA","CO","CT","DE",null,"FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY"],"locationmode":"USA-states","z":[32.453416149068325,10.714285714285714,14.783281733746129,36.71641791044776,6.5551475588195292,13.613101330603889,9.2067988668555234,18.823529411764707,6.7039106145251397,14.061207609594707,29.719484457922668,9.2592592592592595,15.882352941176471,10.746951219512194,27.4209012464046,16.775599128540303,23.622047244094489,21.50537634408602,27.083333333333332,16.037735849056602,10.753598645215918,8.4346504559270521,15.595416043846537,14.66372657111356,42.249240121580549,21.05263157894737,12.727272727272727,12.285714285714286,6.7323481116584567,21.428571428571427,5.777537796976242,24.127906976744185,4.0630975143403445,23.851851851851851,9.1954022988505741,17.783618046430135,20.623145400593472,12.562814070351759,9.1056910569105689,5.9907834101382491,27.938931297709924,19.35483870967742,26.457399103139014,22.4365234375,7.6305220883534135,14,15.631848064280497,12.042818911685995,22.119815668202765,11.326860841423947,11.594202898550725],"colorscale":"Blues","colorbar":{"title":"Urban Food Desert %"},"zmin":0,"zmax":42.249240121580549,"text":["Alabama <br> Urban Desert %: 32.5 %","Alaska <br> Urban Desert %: 10.7 %","Arizona <br> Urban Desert %: 14.8 %","Arkansas <br> Urban Desert %: 36.7 %","California <br> Urban Desert %: 6.6 %","Colorado <br> Urban Desert %: 13.6 %","Connecticut <br> Urban Desert %: 9.2 %","Delaware <br> Urban Desert %: 18.8 %","District of Columbia <br> Urban Desert %: 6.7 %","Florida <br> Urban Desert %: 14.1 %","Georgia <br> Urban Desert %: 29.7 %","Hawaii <br> Urban Desert %: 9.3 %","Idaho <br> Urban Desert %: 15.9 %","Illinois <br> Urban Desert %: 10.7 %","Indiana <br> Urban Desert %: 27.4 %","Iowa <br> Urban Desert %: 16.8 %","Kansas <br> Urban Desert %: 23.6 %","Kentucky <br> Urban Desert %: 21.5 %","Louisiana <br> Urban Desert %: 27.1 %","Maine <br> Urban Desert %: 16 %","Maryland <br> Urban Desert %: 10.8 %","Massachusetts <br> Urban Desert %: 8.4 %","Michigan <br> Urban Desert %: 15.6 %","Minnesota <br> Urban Desert %: 14.7 %","Mississippi <br> Urban Desert %: 42.2 %","Missouri <br> Urban Desert %: 21.1 %","Montana <br> Urban Desert %: 12.7 %","Nebraska <br> Urban Desert %: 12.3 %","Nevada <br> Urban Desert %: 6.7 %","New Hampshire <br> Urban Desert %: 21.4 %","New Jersey <br> Urban Desert %: 5.8 %","New Mexico <br> Urban Desert %: 24.1 %","New York <br> Urban Desert %: 4.1 %","North Carolina <br> Urban Desert %: 23.9 %","North Dakota <br> Urban Desert %: 9.2 %","Ohio <br> Urban Desert %: 17.8 %","Oklahoma <br> Urban Desert %: 20.6 %","Oregon <br> Urban Desert %: 12.6 %","Pennsylvania <br> Urban Desert %: 9.1 %","Rhode Island <br> Urban Desert %: 6 %","South Carolina <br> Urban Desert %: 27.9 %","South Dakota <br> Urban Desert %: 19.4 %","Tennessee <br> Urban Desert %: 26.5 %","Texas <br> Urban Desert %: 22.4 %","Utah <br> Urban Desert %: 7.6 %","Vermont <br> Urban Desert %: 14 %","Virginia <br> Urban Desert %: 15.6 %","Washington <br> Urban Desert %: 12 %","West Virginia <br> Urban Desert %: 22.1 %","Wisconsin <br> Urban Desert %: 11.3 %","Wyoming <br> Urban Desert %: 11.6 %"],"hoverinfo":"text","inherit":true},"6c2c431a4039.1":{"alpha_stroke":1,"sizes":[10,100],"spans":[1,20],"type":"scattergeo","lat":[32.5901,49.25,34.219200000000001,34.733600000000003,36.534100000000002,38.677700000000002,41.592799999999997,38.677700000000002,null,27.874400000000001,32.332900000000002,31.75,43.564799999999998,40.049500000000002,40.049500000000002,41.9358,38.420400000000001,37.391500000000001,30.618099999999998,45.622599999999998,39.277799999999999,42.3645,43.136099999999999,46.394300000000001,32.675800000000002,38.334699999999998,46.823,41.335599999999999,39.106299999999997,43.3934,39.963700000000003,34.476399999999998,43.136099999999999,35.419499999999999,47.2517,40.220999999999997,35.505299999999998,43.907800000000002,40.9069,41.592799999999997,33.619,44.336500000000001,35.676699999999997,31.389700000000001,39.106299999999997,44.250799999999998,37.563000000000002,47.423099999999998,38.420400000000001,44.593699999999998,43.050400000000003],"lon":[-86.750900000000001,-127.25,-111.625,-92.299199999999999,-119.773,-105.51300000000001,-72.357299999999995,-74.984099999999998,null,-81.685000000000002,-83.373599999999996,-126.25,-113.93000000000001,-89.377600000000001,-86.080799999999996,-93.371399999999994,-98.115600000000001,-84.767399999999995,-92.272400000000005,-68.980099999999993,-76.645899999999997,-71.579999999999998,-84.686999999999998,-94.604299999999995,-89.8065,-92.5137,-109.31999999999999,-99.589799999999997,-116.851,-71.392399999999995,-74.233599999999996,-105.94199999999999,-75.144900000000007,-78.468599999999995,-100.099,-82.596299999999999,-97.123900000000006,-120.068,-77.450000000000003,-71.124399999999994,-80.505600000000001,-99.723799999999997,-86.456000000000003,-98.785700000000006,-111.33,-72.545000000000002,-78.200500000000005,-119.746,-80.666499999999999,-89.994100000000003,-107.256],"text":["32%","11%","15%","37%","7%","14%","9%","19%","7%","14%","30%","9%","16%","11%","27%","17%","24%","22%","27%","16%","11%","8%","16%","15%","42%","21%","13%","12%","7%","21%","6%","24%","4%","24%","9%","18%","21%","13%","9%","6%","28%","19%","26%","22%","8%","14%","16%","12%","22%","11%","12%"],"mode":"text","textfont":{"color":"black","size":9},"showlegend":false,"inherit":true}},"layout":{"margin":{"b":40,"l":60,"t":25,"r":10},"title":"Urban Food Desert % by US State","geo":{"scope":"usa","projection":{"type":"albers usa"},"showlakes":true,"lakecolor":"white"},"scene":{"zaxis":{"title":[]}},"hovermode":"closest","showlegend":false,"legend":{"yanchor":"top","y":0.5}},"source":"A","config":{"modeBarButtonsToAdd":["hoverclosest","hovercompare"],"showSendToCloud":false},"data":[{"colorbar":{"title":"Urban Food Desert %","ticklen":2,"len":0.5,"lenmode":"fraction","y":1,"yanchor":"top"},"colorscale":"Blues","showscale":true,"type":"choropleth","locations":["AL","AK","AZ","AR","CA","CO","CT","DE",null,"FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY"],"locationmode":"USA-states","z":[32.453416149068325,10.714285714285714,14.783281733746129,36.71641791044776,6.5551475588195292,13.613101330603889,9.2067988668555234,18.823529411764707,6.7039106145251397,14.061207609594707,29.719484457922668,9.2592592592592595,15.882352941176471,10.746951219512194,27.4209012464046,16.775599128540303,23.622047244094489,21.50537634408602,27.083333333333332,16.037735849056602,10.753598645215918,8.4346504559270521,15.595416043846537,14.66372657111356,42.249240121580549,21.05263157894737,12.727272727272727,12.285714285714286,6.7323481116584567,21.428571428571427,5.777537796976242,24.127906976744185,4.0630975143403445,23.851851851851851,9.1954022988505741,17.783618046430135,20.623145400593472,12.562814070351759,9.1056910569105689,5.9907834101382491,27.938931297709924,19.35483870967742,26.457399103139014,22.4365234375,7.6305220883534135,14,15.631848064280497,12.042818911685995,22.119815668202765,11.326860841423947,11.594202898550725],"zmin":0,"zmax":42.249240121580549,"text":["Alabama <br> Urban Desert %: 32.5 %","Alaska <br> Urban Desert %: 10.7 %","Arizona <br> Urban Desert %: 14.8 %","Arkansas <br> Urban Desert %: 36.7 %","California <br> Urban Desert %: 6.6 %","Colorado <br> Urban Desert %: 13.6 %","Connecticut <br> Urban Desert %: 9.2 %","Delaware <br> Urban Desert %: 18.8 %","District of Columbia <br> Urban Desert %: 6.7 %","Florida <br> Urban Desert %: 14.1 %","Georgia <br> Urban Desert %: 29.7 %","Hawaii <br> Urban Desert %: 9.3 %","Idaho <br> Urban Desert %: 15.9 %","Illinois <br> Urban Desert %: 10.7 %","Indiana <br> Urban Desert %: 27.4 %","Iowa <br> Urban Desert %: 16.8 %","Kansas <br> Urban Desert %: 23.6 %","Kentucky <br> Urban Desert %: 21.5 %","Louisiana <br> Urban Desert %: 27.1 %","Maine <br> Urban Desert %: 16 %","Maryland <br> Urban Desert %: 10.8 %","Massachusetts <br> Urban Desert %: 8.4 %","Michigan <br> Urban Desert %: 15.6 %","Minnesota <br> Urban Desert %: 14.7 %","Mississippi <br> Urban Desert %: 42.2 %","Missouri <br> Urban Desert %: 21.1 %","Montana <br> Urban Desert %: 12.7 %","Nebraska <br> Urban Desert %: 12.3 %","Nevada <br> Urban Desert %: 6.7 %","New Hampshire <br> Urban Desert %: 21.4 %","New Jersey <br> Urban Desert %: 5.8 %","New Mexico <br> Urban Desert %: 24.1 %","New York <br> Urban Desert %: 4.1 %","North Carolina <br> Urban Desert %: 23.9 %","North Dakota <br> Urban Desert %: 9.2 %","Ohio <br> Urban Desert %: 17.8 %","Oklahoma <br> Urban Desert %: 20.6 %","Oregon <br> Urban Desert %: 12.6 %","Pennsylvania <br> Urban Desert %: 9.1 %","Rhode Island <br> Urban Desert %: 6 %","South Carolina <br> Urban Desert %: 27.9 %","South Dakota <br> Urban Desert %: 19.4 %","Tennessee <br> Urban Desert %: 26.5 %","Texas <br> Urban Desert %: 22.4 %","Utah <br> Urban Desert %: 7.6 %","Vermont <br> Urban Desert %: 14 %","Virginia <br> Urban Desert %: 15.6 %","Washington <br> Urban Desert %: 12 %","West Virginia <br> Urban Desert %: 22.1 %","Wisconsin <br> Urban Desert %: 11.3 %","Wyoming <br> Urban Desert %: 11.6 %"],"hoverinfo":["text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text"],"marker":{"line":{"color":"rgba(31,119,180,1)"}},"frame":null},{"type":"scattergeo","lat":[32.5901,49.25,34.219200000000001,34.733600000000003,36.534100000000002,38.677700000000002,41.592799999999997,38.677700000000002,null,27.874400000000001,32.332900000000002,31.75,43.564799999999998,40.049500000000002,40.049500000000002,41.9358,38.420400000000001,37.391500000000001,30.618099999999998,45.622599999999998,39.277799999999999,42.3645,43.136099999999999,46.394300000000001,32.675800000000002,38.334699999999998,46.823,41.335599999999999,39.106299999999997,43.3934,39.963700000000003,34.476399999999998,43.136099999999999,35.419499999999999,47.2517,40.220999999999997,35.505299999999998,43.907800000000002,40.9069,41.592799999999997,33.619,44.336500000000001,35.676699999999997,31.389700000000001,39.106299999999997,44.250799999999998,37.563000000000002,47.423099999999998,38.420400000000001,44.593699999999998,43.050400000000003],"lon":[-86.750900000000001,-127.25,-111.625,-92.299199999999999,-119.773,-105.51300000000001,-72.357299999999995,-74.984099999999998,null,-81.685000000000002,-83.373599999999996,-126.25,-113.93000000000001,-89.377600000000001,-86.080799999999996,-93.371399999999994,-98.115600000000001,-84.767399999999995,-92.272400000000005,-68.980099999999993,-76.645899999999997,-71.579999999999998,-84.686999999999998,-94.604299999999995,-89.8065,-92.5137,-109.31999999999999,-99.589799999999997,-116.851,-71.392399999999995,-74.233599999999996,-105.94199999999999,-75.144900000000007,-78.468599999999995,-100.099,-82.596299999999999,-97.123900000000006,-120.068,-77.450000000000003,-71.124399999999994,-80.505600000000001,-99.723799999999997,-86.456000000000003,-98.785700000000006,-111.33,-72.545000000000002,-78.200500000000005,-119.746,-80.666499999999999,-89.994100000000003,-107.256],"text":["32%","11%","15%","37%","7%","14%","9%","19%","7%","14%","30%","9%","16%","11%","27%","17%","24%","22%","27%","16%","11%","8%","16%","15%","42%","21%","13%","12%","7%","21%","6%","24%","4%","24%","9%","18%","21%","13%","9%","6%","28%","19%","26%","22%","8%","14%","16%","12%","22%","11%","12%"],"mode":"text","textfont":{"color":"black","size":9},"showlegend":false,"marker":{"color":"rgba(255,127,14,1)","line":{"color":"rgba(255,127,14,1)"}},"line":{"color":"rgba(255,127,14,1)"},"frame":null}],"highlight":{"on":"plotly_click","persistent":false,"dynamic":false,"selectize":false,"opacityDim":0.20000000000000001,"selected":{"opacity":1},"debounce":0},"shinyEvents":["plotly_hover","plotly_click","plotly_selected","plotly_relayout","plotly_brushed","plotly_brushing","plotly_clickannotation","plotly_doubleclick","plotly_deselect","plotly_afterplot","plotly_sunburstclick"],"base_url":"https://plot.ly"},"evals":[],"jsHooks":[]}</script>
{=html} <div class="plotly html-widget html-fill-item" id="htmlwidget-7a2e31321df24aaa6303" style="width:672px;height:480px;"></div> <script type="application/json" data-for="htmlwidget-7a2e31321df24aaa6303">{"x":{"visdat":{"6c2c5e8576b":["function () ","plotlyVisDat"]},"cur_data":"6c2c5e8576b","attrs":{"6c2c5e8576b":{"alpha_stroke":1,"sizes":[10,100],"spans":[1,20],"type":"choropleth","locations":["AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY"],"locationmode":"USA-states","z":[10.861423220973784,28.91566265060241,28.94736842105263,13.675213675213675,8.0476900149031305,15.09433962264151,0,0,7.2072072072072073,7.6802507836990594,13.725490196078431,10.9375,7.5356415478615073,1.0775862068965518,2.197802197802198,7.3643410852713176,5.9782608695652177,11.821086261980831,5.3061224489795915,1.9138755980861244,0.66225165562913912,3.4712950600801067,13.817330210772834,20.90909090909091,11.304347826086957,13.664596273291925,6.5934065934065931,11.267605633802816,3.6231884057971016,0.66666666666666663,27.922077922077921,3.4985422740524781,3.7575757575757578,7.6271186440677967,2.2727272727272729,10.21505376344086,10.91703056768559,1.8666666666666667,0,8.0275229357798157,10.852713178294573,5.025125628140704,9.0192644483362514,12.64367816091954,3.7593984962406015,10.638297872340425,12.037037037037036,6.7415730337078648,6.021505376344086,6.4516129032258061],"colorscale":"Greens","colorbar":{"title":"Rural Food Desert %"},"zmin":0,"zmax":42.249240121580549,"text":["Alabama <br> Rural Desert %: 10.9 %","Alaska <br> Rural Desert %: 28.9 %","Arizona <br> Rural Desert %: 28.9 %","Arkansas <br> Rural Desert %: 13.7 %","California <br> Rural Desert %: 8 %","Colorado <br> Rural Desert %: 15.1 %","Connecticut <br> Rural Desert %: 0 %","Delaware <br> Rural Desert %: 0 %","Florida <br> Rural Desert %: 7.2 %","Georgia <br> Rural Desert %: 7.7 %","Hawaii <br> Rural Desert %: 13.7 %","Idaho <br> Rural Desert %: 10.9 %","Illinois <br> Rural Desert %: 7.5 %","Indiana <br> Rural Desert %: 1.1 %","Iowa <br> Rural Desert %: 2.2 %","Kansas <br> Rural Desert %: 7.4 %","Kentucky <br> Rural Desert %: 6 %","Louisiana <br> Rural Desert %: 11.8 %","Maine <br> Rural Desert %: 5.3 %","Maryland <br> Rural Desert %: 1.9 %","Massachusetts <br> Rural Desert %: 0.7 %","Michigan <br> Rural Desert %: 3.5 %","Minnesota <br> Rural Desert %: 13.8 %","Mississippi <br> Rural Desert %: 20.9 %","Missouri <br> Rural Desert %: 11.3 %","Montana <br> Rural Desert %: 13.7 %","Nebraska <br> Rural Desert %: 6.6 %","Nevada <br> Rural Desert %: 11.3 %","New Hampshire <br> Rural Desert %: 3.6 %","New Jersey <br> Rural Desert %: 0.7 %","New Mexico <br> Rural Desert %: 27.9 %","New York <br> Rural Desert %: 3.5 %","North Carolina <br> Rural Desert %: 3.8 %","North Dakota <br> Rural Desert %: 7.6 %","Ohio <br> Rural Desert %: 2.3 %","Oklahoma <br> Rural Desert %: 10.2 %","Oregon <br> Rural Desert %: 10.9 %","Pennsylvania <br> Rural Desert %: 1.9 %","Rhode Island <br> Rural Desert %: 0 %","South Carolina <br> Rural Desert %: 8 %","South Dakota <br> Rural Desert %: 10.9 %","Tennessee <br> Rural Desert %: 5 %","Texas <br> Rural Desert %: 9 %","Utah <br> Rural Desert %: 12.6 %","Vermont <br> Rural Desert %: 3.8 %","Virginia <br> Rural Desert %: 10.6 %","Washington <br> Rural Desert %: 12 %","West Virginia <br> Rural Desert %: 6.7 %","Wisconsin <br> Rural Desert %: 6 %","Wyoming <br> Rural Desert %: 6.5 %"],"hoverinfo":"text","inherit":true},"6c2c5e8576b.1":{"alpha_stroke":1,"sizes":[10,100],"spans":[1,20],"type":"scattergeo","lat":[32.5901,49.25,34.219200000000001,34.733600000000003,36.534100000000002,38.677700000000002,41.592799999999997,38.677700000000002,27.874400000000001,32.332900000000002,31.75,43.564799999999998,40.049500000000002,40.049500000000002,41.9358,38.420400000000001,37.391500000000001,30.618099999999998,45.622599999999998,39.277799999999999,42.3645,43.136099999999999,46.394300000000001,32.675800000000002,38.334699999999998,46.823,41.335599999999999,39.106299999999997,43.3934,39.963700000000003,34.476399999999998,43.136099999999999,35.419499999999999,47.2517,40.220999999999997,35.505299999999998,43.907800000000002,40.9069,41.592799999999997,33.619,44.336500000000001,35.676699999999997,31.389700000000001,39.106299999999997,44.250799999999998,37.563000000000002,47.423099999999998,38.420400000000001,44.593699999999998,43.050400000000003],"lon":[-86.750900000000001,-127.25,-111.625,-92.299199999999999,-119.773,-105.51300000000001,-72.357299999999995,-74.984099999999998,-81.685000000000002,-83.373599999999996,-126.25,-113.93000000000001,-89.377600000000001,-86.080799999999996,-93.371399999999994,-98.115600000000001,-84.767399999999995,-92.272400000000005,-68.980099999999993,-76.645899999999997,-71.579999999999998,-84.686999999999998,-94.604299999999995,-89.8065,-92.5137,-109.31999999999999,-99.589799999999997,-116.851,-71.392399999999995,-74.233599999999996,-105.94199999999999,-75.144900000000007,-78.468599999999995,-100.099,-82.596299999999999,-97.123900000000006,-120.068,-77.450000000000003,-71.124399999999994,-80.505600000000001,-99.723799999999997,-86.456000000000003,-98.785700000000006,-111.33,-72.545000000000002,-78.200500000000005,-119.746,-80.666499999999999,-89.994100000000003,-107.256],"text":["11%","29%","29%","14%","8%","15%","0%","0%","7%","8%","14%","11%","8%","1%","2%","7%","6%","12%","5%","2%","1%","3%","14%","21%","11%","14%","7%","11%","4%","1%","28%","3%","4%","8%","2%","10%","11%","2%","0%","8%","11%","5%","9%","13%","4%","11%","12%","7%","6%","6%"],"mode":"text","textfont":{"color":"black","size":9},"showlegend":false,"inherit":true}},"layout":{"margin":{"b":40,"l":60,"t":25,"r":10},"title":"Rural Food Desert % by US State","geo":{"scope":"usa","projection":{"type":"albers usa"},"showlakes":true,"lakecolor":"white"},"scene":{"zaxis":{"title":[]}},"hovermode":"closest","showlegend":false,"legend":{"yanchor":"top","y":0.5}},"source":"A","config":{"modeBarButtonsToAdd":["hoverclosest","hovercompare"],"showSendToCloud":false},"data":[{"colorbar":{"title":"Rural Food Desert %","ticklen":2,"len":0.5,"lenmode":"fraction","y":1,"yanchor":"top"},"colorscale":"Greens","showscale":true,"type":"choropleth","locations":["AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY"],"locationmode":"USA-states","z":[10.861423220973784,28.91566265060241,28.94736842105263,13.675213675213675,8.0476900149031305,15.09433962264151,0,0,7.2072072072072073,7.6802507836990594,13.725490196078431,10.9375,7.5356415478615073,1.0775862068965518,2.197802197802198,7.3643410852713176,5.9782608695652177,11.821086261980831,5.3061224489795915,1.9138755980861244,0.66225165562913912,3.4712950600801067,13.817330210772834,20.90909090909091,11.304347826086957,13.664596273291925,6.5934065934065931,11.267605633802816,3.6231884057971016,0.66666666666666663,27.922077922077921,3.4985422740524781,3.7575757575757578,7.6271186440677967,2.2727272727272729,10.21505376344086,10.91703056768559,1.8666666666666667,0,8.0275229357798157,10.852713178294573,5.025125628140704,9.0192644483362514,12.64367816091954,3.7593984962406015,10.638297872340425,12.037037037037036,6.7415730337078648,6.021505376344086,6.4516129032258061],"zmin":0,"zmax":42.249240121580549,"text":["Alabama <br> Rural Desert %: 10.9 %","Alaska <br> Rural Desert %: 28.9 %","Arizona <br> Rural Desert %: 28.9 %","Arkansas <br> Rural Desert %: 13.7 %","California <br> Rural Desert %: 8 %","Colorado <br> Rural Desert %: 15.1 %","Connecticut <br> Rural Desert %: 0 %","Delaware <br> Rural Desert %: 0 %","Florida <br> Rural Desert %: 7.2 %","Georgia <br> Rural Desert %: 7.7 %","Hawaii <br> Rural Desert %: 13.7 %","Idaho <br> Rural Desert %: 10.9 %","Illinois <br> Rural Desert %: 7.5 %","Indiana <br> Rural Desert %: 1.1 %","Iowa <br> Rural Desert %: 2.2 %","Kansas <br> Rural Desert %: 7.4 %","Kentucky <br> Rural Desert %: 6 %","Louisiana <br> Rural Desert %: 11.8 %","Maine <br> Rural Desert %: 5.3 %","Maryland <br> Rural Desert %: 1.9 %","Massachusetts <br> Rural Desert %: 0.7 %","Michigan <br> Rural Desert %: 3.5 %","Minnesota <br> Rural Desert %: 13.8 %","Mississippi <br> Rural Desert %: 20.9 %","Missouri <br> Rural Desert %: 11.3 %","Montana <br> Rural Desert %: 13.7 %","Nebraska <br> Rural Desert %: 6.6 %","Nevada <br> Rural Desert %: 11.3 %","New Hampshire <br> Rural Desert %: 3.6 %","New Jersey <br> Rural Desert %: 0.7 %","New Mexico <br> Rural Desert %: 27.9 %","New York <br> Rural Desert %: 3.5 %","North Carolina <br> Rural Desert %: 3.8 %","North Dakota <br> Rural Desert %: 7.6 %","Ohio <br> Rural Desert %: 2.3 %","Oklahoma <br> Rural Desert %: 10.2 %","Oregon <br> Rural Desert %: 10.9 %","Pennsylvania <br> Rural Desert %: 1.9 %","Rhode Island <br> Rural Desert %: 0 %","South Carolina <br> Rural Desert %: 8 %","South Dakota <br> Rural Desert %: 10.9 %","Tennessee <br> Rural Desert %: 5 %","Texas <br> Rural Desert %: 9 %","Utah <br> Rural Desert %: 12.6 %","Vermont <br> Rural Desert %: 3.8 %","Virginia <br> Rural Desert %: 10.6 %","Washington <br> Rural Desert %: 12 %","West Virginia <br> Rural Desert %: 6.7 %","Wisconsin <br> Rural Desert %: 6 %","Wyoming <br> Rural Desert %: 6.5 %"],"hoverinfo":["text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text","text"],"marker":{"line":{"color":"rgba(31,119,180,1)"}},"frame":null},{"type":"scattergeo","lat":[32.5901,49.25,34.219200000000001,34.733600000000003,36.534100000000002,38.677700000000002,41.592799999999997,38.677700000000002,27.874400000000001,32.332900000000002,31.75,43.564799999999998,40.049500000000002,40.049500000000002,41.9358,38.420400000000001,37.391500000000001,30.618099999999998,45.622599999999998,39.277799999999999,42.3645,43.136099999999999,46.394300000000001,32.675800000000002,38.334699999999998,46.823,41.335599999999999,39.106299999999997,43.3934,39.963700000000003,34.476399999999998,43.136099999999999,35.419499999999999,47.2517,40.220999999999997,35.505299999999998,43.907800000000002,40.9069,41.592799999999997,33.619,44.336500000000001,35.676699999999997,31.389700000000001,39.106299999999997,44.250799999999998,37.563000000000002,47.423099999999998,38.420400000000001,44.593699999999998,43.050400000000003],"lon":[-86.750900000000001,-127.25,-111.625,-92.299199999999999,-119.773,-105.51300000000001,-72.357299999999995,-74.984099999999998,-81.685000000000002,-83.373599999999996,-126.25,-113.93000000000001,-89.377600000000001,-86.080799999999996,-93.371399999999994,-98.115600000000001,-84.767399999999995,-92.272400000000005,-68.980099999999993,-76.645899999999997,-71.579999999999998,-84.686999999999998,-94.604299999999995,-89.8065,-92.5137,-109.31999999999999,-99.589799999999997,-116.851,-71.392399999999995,-74.233599999999996,-105.94199999999999,-75.144900000000007,-78.468599999999995,-100.099,-82.596299999999999,-97.123900000000006,-120.068,-77.450000000000003,-71.124399999999994,-80.505600000000001,-99.723799999999997,-86.456000000000003,-98.785700000000006,-111.33,-72.545000000000002,-78.200500000000005,-119.746,-80.666499999999999,-89.994100000000003,-107.256],"text":["11%","29%","29%","14%","8%","15%","0%","0%","7%","8%","14%","11%","8%","1%","2%","7%","6%","12%","5%","2%","1%","3%","14%","21%","11%","14%","7%","11%","4%","1%","28%","3%","4%","8%","2%","10%","11%","2%","0%","8%","11%","5%","9%","13%","4%","11%","12%","7%","6%","6%"],"mode":"text","textfont":{"color":"black","size":9},"showlegend":false,"marker":{"color":"rgba(255,127,14,1)","line":{"color":"rgba(255,127,14,1)"}},"line":{"color":"rgba(255,127,14,1)"},"frame":null}],"highlight":{"on":"plotly_click","persistent":false,"dynamic":false,"selectize":false,"opacityDim":0.20000000000000001,"selected":{"opacity":1},"debounce":0},"shinyEvents":["plotly_hover","plotly_click","plotly_selected","plotly_relayout","plotly_brushed","plotly_brushing","plotly_clickannotation","plotly_doubleclick","plotly_deselect","plotly_afterplot","plotly_sunburstclick"],"base_url":"https://plot.ly"},"evals":[],"jsHooks":[]}</script>
``` r state_summary <- df %>% group_by(State, Urban) %>% summarise( TotalFoodDeserts = sum(FoodDesert, na.rm = TRUE), TotalTracts = n(), FoodDesertPercentage = 100 * TotalFoodDeserts / TotalTracts, .groups = ‘drop’ )
urban_summary <- state_summary %>% filter(Urban == 1) %>% arrange(desc(FoodDesertPercentage))
rural_summary <- state_summary %>% filter(Urban == 0) %>% arrange(desc(FoodDesertPercentage))
urban_plot <- ggplot(urban_summary, aes(x = reorder(State, FoodDesertPercentage), y = FoodDesertPercentage)) + geom_bar(stat = “identity”, fill = “steelblue”) + coord_flip() + labs( title = “Urban Food Desert % by State”, x = “State”, y = “Urban Desert %” ) + theme_minimal(base_size = 12)
rural_plot <- ggplot(rural_summary, aes(x = reorder(State, FoodDesertPercentage), y = FoodDesertPercentage)) + geom_bar(stat = “identity”, fill = “seagreen”) + coord_flip() + labs( title = “Rural Food Desert % by State”, x = “State”, y = “Rural Desert %” ) + theme_minimal(base_size = 12)
urban_plot + rural_plot + plot_layout(ncol = 2) ```
By far Texas has the most food desert tracts, but it also has a large number of total tracts. Mississippi has the highest percentage of food desert tracts, with an astonishing 32% of its tracts being food deserts. Mississppi also has the highest percentage of food desert tracts in urban areas which contribute to 42% of its total tracts. Arizona and alaska share the highest percentage of food desert tracts in rural areas, with 29%.

SMART Q2

What demographics and socioeconomic factors are linked to food deserts?

Data Cleaning and imputation of the combined data source

foodatlas <- read_csv("../dataset/FoodAccessResearchAtlasData2019.csv", col_types = cols(CensusTract = col_character()))
socioeconomic <- read_csv("../dataset/FE_socioeconomic.csv")
insecurity <- read_csv("../dataset/FE_insecurity.csv")
health <- read_csv("../dataset/FE_health.csv")
stores <- read_csv("../dataset/FE_stores.csv")
restaurants <- read_csv("../dataset/FE_restaurants.csv")
taxes <- read_csv("../dataset/FE_taxes.csv")
local <- read_csv("../dataset/FE_local.csv")
access <- read_csv("../dataset/FE_access.csv")
state_data <- read_csv("../dataset/FE_supplemental_data_state.csv")
county_data <- read_csv("../dataset/FE_supplemental_data_county.csv")


clean_nulls <- function(df) {
  df %>% mutate(across(where(is.character), ~ na_if(., "NULL")))
}

foodatlas <- clean_nulls(foodatlas)
socioeconomic <- clean_nulls(socioeconomic)
insecurity <- clean_nulls(insecurity)
health <- clean_nulls(health)
stores <- clean_nulls(stores)
restaurants <- clean_nulls(restaurants)
taxes <- clean_nulls(taxes)
local <- clean_nulls(local)
access <- clean_nulls(access)
state_data <- clean_nulls(state_data)
county_data <- clean_nulls(county_data)


foodatlas <- foodatlas %>%
  mutate(CensusTract = str_pad(CensusTract, 11, pad = "0"),
         CountyFIPS = substr(CensusTract, 1, 5))

socioeconomic <- socioeconomic %>% mutate(CountyFIPS = str_pad(FIPS, 5, pad = "0"))
insecurity <- insecurity %>% mutate(CountyFIPS = str_pad(FIPS, 5, pad = "0"))
health <- health %>% mutate(CountyFIPS = str_pad(FIPS, 5, pad = "0"))
stores <- stores %>% mutate(CountyFIPS = str_pad(FIPS, 5, pad = "0"))
restaurants <- restaurants %>% mutate(CountyFIPS = str_pad(FIPS, 5, pad = "0"))
taxes <- taxes %>% mutate(CountyFIPS = str_pad(FIPS, 5, pad = "0"))
local <- local %>% mutate(CountyFIPS = str_pad(FIPS, 5, pad = "0"))
access <- access %>% mutate(CountyFIPS = str_pad(FIPS, 5, pad = "0"))
county_data <- county_data %>% mutate(CountyFIPS = str_pad(FIPS, 5, pad = "0"))

# Merge everything into foodatlas
merged_df <- foodatlas %>%
  left_join(socioeconomic, by = "CountyFIPS") %>%
  left_join(insecurity, by = "CountyFIPS") %>%
  left_join(health, by = "CountyFIPS") %>%
  left_join(stores, by = "CountyFIPS") %>%
  left_join(restaurants, by = "CountyFIPS") %>%
  left_join(taxes, by = "CountyFIPS") %>%
  left_join(local, by = "CountyFIPS") %>%
  left_join(access, by = "CountyFIPS") %>%
  left_join(county_data, by = "CountyFIPS")


merged_df$FoodDesert <- as.numeric(as.character(merged_df$LILATracts_1And10))


predictor_vars <- merged_df %>%
  select(-FoodDesert) %>%
  select(where(is.numeric))    
  

# Drop columns with >10% missing
missing_pct <- colMeans(is.na(predictor_vars))
predictor_vars <- predictor_vars[, missing_pct <= 0.10]

# Impute remaining NA with medians
predictor_vars <- predictor_vars %>%
  mutate(across(everything(), ~ ifelse(is.na(.), median(., na.rm = TRUE), .)))

is_binary <- function(x) {
  unique_vals <- unique(na.omit(x))
  length(unique_vals) == 2 && all(sort(unique_vals) %in% c(0, 1))
}

binary_cols <- names(predictor_vars)[sapply(predictor_vars, is_binary)]
continuous_cols <- setdiff(names(predictor_vars), binary_cols)

# Convert binary variables to factor
predictor_vars <- predictor_vars %>%
  mutate(across(all_of(binary_cols), as.factor))

# Scale continuous variables
predictor_vars <- predictor_vars %>%
  mutate(across(all_of(continuous_cols), scale))

# Prepare modeling data
model_data <- bind_cols(
  FoodDesert = merged_df$FoodDesert,
  predictor_vars
) %>% drop_na()

set.seed(97)
model_data_sampled <- model_data %>% sample_frac(0.3)

str(model_data_sampled)

Iterative feature selection process starts here

predictor_vars <- model_data_sampled %>%
  select(-c(FoodDesert,LILATracts_1And10,LILATracts_1And20,LILATracts_halfAnd10,`2010_Census_Population`)) %>% 
  select(where(is.numeric))
  

y <- model_data_sampled$FoodDesert


univariate_results <- lapply(names(predictor_vars), function(var) {
  temp_formula <- as.formula(paste("FoodDesert ~", var))
  model <- glm(temp_formula, data = model_data_sampled, family = binomial())
  tidy(model) %>%
    filter(term != "(Intercept)") %>%
    mutate(variable = var)
})


univariate_df <- do.call(rbind, univariate_results)


selected_vars_univariate <- univariate_df %>%
  filter(p.value < 0.05) %>%
  pull(variable)

cat("Variables selected after univariate screening:", length(selected_vars_univariate), "\n")
print(selected_vars_univariate)
X_lasso <- model.matrix(
  as.formula(paste("~", paste0("`", selected_vars_univariate, "`", collapse = " + "))),
  data = model_data_sampled
)[, -1]  # remove intercept

y_lasso <- model_data_sampled$FoodDesert

# Lasso logistic regression
set.seed(72)
lasso_fit <- cv.glmnet(X_lasso, y_lasso, family = "binomial", alpha = 1)


best_lambda <- lasso_fit$lambda.min


lasso_coefs <- coef(lasso_fit, s = best_lambda)
selected_lasso_vars <- rownames(lasso_coefs)[lasso_coefs[, 1] != 0]
selected_lasso_vars <- selected_lasso_vars[selected_lasso_vars != "(Intercept)"]

cat("Variables selected after Lasso:", length(selected_lasso_vars), "\n")
print(selected_lasso_vars)
remove_high_vif_iteratively <- function(data, target = "FoodDesert", threshold = 5) {
  

  current_vars <- setdiff(names(data), target)
  
  repeat {
    
    temp_formula <- as.formula(paste(target, "~", paste0("`", current_vars, "`", collapse = " + ")))
    
   
    temp_model <- glm(temp_formula, data = data, family = binomial())
    
    
    vif_values <- vif(temp_model)
    
 
    if (max(vif_values, na.rm = TRUE) < threshold) {
      break
    }
    
   
    worst_var <- names(which.max(vif_values))
    
    cat("Removing variable:", worst_var, "VIF =", round(max(vif_values), 2), "\n")
    
  
    current_vars <- setdiff(current_vars, worst_var)
  }
  
  return(current_vars)
}

model_data_reduced <- model_data_sampled %>%
  select(all_of(c(selected_lasso_vars, "FoodDesert"))) %>%
  drop_na()

# Run VIF-based iterative filtering
final_selected_vars <- remove_high_vif_iteratively(model_data_reduced, target = "FoodDesert", threshold = 5)


cat("Final number of predictors:", length(final_selected_vars), "\n")
print(final_selected_vars)
remove_high_pvalue_iteratively <- function(data, target = "FoodDesert", threshold = 0.05) {
  
 
  current_vars <- setdiff(names(data), target)
  
  repeat {
   
    temp_formula <- as.formula(paste(target, "~", paste0("`", current_vars, "`", collapse = " + ")))
    
    
    temp_model <- glm(temp_formula, data = data, family = binomial())
    

    model_summary <- tidy(temp_model)
    
    # Remove intercept row
    model_summary <- model_summary %>%
      filter(term != "(Intercept)")
    
    # Check max p-value
    max_pval <- max(model_summary$p.value, na.rm = TRUE)
    
    if (max_pval < threshold) {
      break
    }
    
   
    worst_var <- model_summary %>%
      filter(p.value == max_pval) %>%
      pull(term) %>%
      gsub("`", "", .)  # remove backticks
    
    cat("Removing variable:", worst_var, "p-value =", round(max_pval, 4), "\n")
    
   
    current_vars <- setdiff(current_vars, worst_var)
  }
  
  return(current_vars)
}


model_data_reduced <- model_data_sampled %>%
  select(all_of(c(final_selected_vars, "FoodDesert"))) %>%
  drop_na()

final_significant_vars <- remove_high_pvalue_iteratively(model_data_reduced, target = "FoodDesert", threshold = 0.01)


cat("Final number of predictors after p-value filtering:", length(final_significant_vars), "\n")
print(final_significant_vars)


final_formula <- as.formula(paste("FoodDesert ~", paste0("`", final_significant_vars, "`", collapse = " + ")))
final_model <- glm(final_formula, data = model_data_reduced, family = binomial())

Final logit-model summary

## 
## Call:
## glm(formula = final_formula, family = binomial(), data = model_data_reduced)
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             -2.4968     0.0314  -79.56  < 2e-16 ***
## NUMGQTRS                 0.1605     0.0196    8.17  3.2e-16 ***
## PovertyRate              0.4851     0.0289   16.79  < 2e-16 ***
## TractKids                0.2417     0.0379    6.38  1.8e-10 ***
## TractSeniors             0.2166     0.0308    7.03  2.1e-12 ***
## TractWhite              -0.5274     0.0409  -12.91  < 2e-16 ***
## TractAsian              -0.3717     0.0528   -7.04  1.9e-12 ***
## TractAIAN                0.0965     0.0296    3.26  0.00113 ** 
## TractSNAP                0.3322     0.0301   11.03  < 2e-16 ***
## PCT_NHNA10              -0.1189     0.0312   -3.81  0.00014 ***
## POVRATE15               -0.2068     0.0312   -6.63  3.4e-11 ***
## CH_FOODINSEC_14_17      -0.1470     0.0326   -4.51  6.4e-06 ***
## PCT_OBESE_ADULTS12      -0.0951     0.0270   -3.52  0.00043 ***
## GROCPTH11               -0.1915     0.0374   -5.12  3.1e-07 ***
## SUPERCPTH16              0.1005     0.0224    4.48  7.3e-06 ***
## CONVSPTH16               0.0886     0.0291    3.04  0.00234 ** 
## SPECSPTH11              -0.0944     0.0339   -2.78  0.00541 ** 
## FSRPTH11                 0.0764     0.0292    2.62  0.00886 ** 
## PCT_LOCLFARM07          -0.1869     0.0522   -3.58  0.00035 ***
## PCT_LOCLSALE12          -0.0917     0.0338   -2.71  0.00674 ** 
## PC_DIRSALES07            0.0726     0.0222    3.27  0.00106 ** 
## FMRKT13                 -0.1886     0.0441   -4.28  1.9e-05 ***
## PCT_FMRKT_SNAP18         0.0788     0.0274    2.88  0.00400 ** 
## PCT_FMRKT_CREDIT18      -0.0816     0.0264   -3.09  0.00199 ** 
## PCT_LACCESS_LOWI10       0.2188     0.0346    6.32  2.6e-10 ***
## PCH_LACCESS_HHNV_10_15   0.3866     0.1112    3.48  0.00051 ***
## PCT_LACCESS_HHNV10       0.1741     0.0313    5.57  2.6e-08 ***
## PCT_LACCESS_SNAP15       0.1822     0.0367    4.97  6.7e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 16498  on 21758  degrees of freedom
## Residual deviance: 13145  on 21731  degrees of freedom
## AIC: 13201
## 
## Number of Fisher Scoring iterations: 6
## [1] "VIF values of the coefficients:"
##          PC_DIRSALES07               NUMGQTRS             TractAsian 
##                   1.16                   1.20                   1.20 
##            SUPERCPTH16 PCH_LACCESS_HHNV_10_15     CH_FOODINSEC_14_17 
##                   1.21                   1.23                   1.27 
##         PCT_LOCLSALE12     PCT_OBESE_ADULTS12                FMRKT13 
##                   1.30                   1.40                   1.52 
##              GROCPTH11             SPECSPTH11         PCT_LOCLFARM07 
##                   1.52                   1.53                   1.55 
##               FSRPTH11       PCT_FMRKT_SNAP18     PCT_FMRKT_CREDIT18 
##                   1.56                   1.69                   1.73 
##             CONVSPTH16           TractSeniors              POVRATE15 
##                   1.81                   1.86                   1.90 
##     PCT_LACCESS_HHNV10            PovertyRate              TractAIAN 
##                   1.94                   2.03                   2.18 
##             PCT_NHNA10              TractSNAP              TractKids 
##                   2.26                   2.34                   2.77 
##     PCT_LACCESS_LOWI10             TractWhite     PCT_LACCESS_SNAP15 
##                   2.85                   3.09                   3.16

Top 5 features by abs correlation value

final_vars_with_target <- c(final_significant_vars, "FoodDesert")


final_data <- model_data_reduced %>%
  select(all_of(final_vars_with_target))


final_data_numeric <- final_data %>%
  mutate(across(everything(), ~ as.numeric(as.character(.))))


cor_matrix_final <- cor(final_data_numeric, use = "complete.obs", method = "spearman")


cor_with_target <- data.frame(
  Variable = rownames(cor_matrix_final),
  Correlation = cor_matrix_final[, "FoodDesert"]
)


cor_with_target <- cor_with_target %>%
  filter(Variable != "FoodDesert")


cor_with_target <- cor_with_target %>%
  mutate(abs_correlation = abs(Correlation))


top10_features <- cor_with_target %>%
  arrange(desc(abs_correlation)) %>%
  slice(1:5)

top10_var_names <- top10_features$Variable


cor_matrix_top10 <- cor_matrix_final[c(top10_var_names,"FoodDesert"), c(top10_var_names,"FoodDesert")]


corrplot(
  cor_matrix_top10,
  method = "color",
  type = "upper",
  addCoef.col = "black",
  tl.col = "black",
  tl.srt = 45,
  number.cex = 0.8,
  tl.cex = 0.9,
  mar = c(0,0,2,0)
)

boxplot_data <- merged_df %>%
  mutate(FoodDesert = as.factor(FoodDesert)) %>% 
  select(c(top10_var_names,"FoodDesert"))
 


for (var in top10_var_names) { 
  p <- ggplot(boxplot_data, aes(x = FoodDesert, y = .data[[var]])) +
    geom_boxplot(fill = "green") +
    labs(
      title = paste("Boxplot of", var, "by Food Desert Status"),
      x = "Food Desert (0 = No, 1 = Yes)",
      y = var
    ) +
    theme_minimal(base_size = 14)
  
  print(p)
}

It looks like PovertyRate,Vehicle access and SNAP status are the most prominent factors

# SMART Q3 ## How are food deserts related to health issues like obesity and diabetes?
### Select the relevant health variables
``` r health_vars <- c( “PCT_OBESE_ADULTS17”, # % Obese Adults “PCT_DIABETES_ADULTS13”, # % Adults with Diabetes “FoodDesert” )
# 2. Subset and prepare data health_data <- merged_df %>% select(all_of(health_vars)) %>% mutate(FoodDesert = as.factor(FoodDesert))
for (var in health_vars[1:2]) { # Loop over obesity and diabetes p <- ggplot(health_data, aes(x = FoodDesert, y = .data[[var]])) + geom_boxplot(fill = “tomato”) + labs( title = paste(“Boxplot of”, var, “by Food Desert Status”), x = “Food Desert (0 = No, 1 = Yes)”, y = var ) + theme_minimal(base_size = 14)
print(p) } ```
``` r health_data_numeric <- health_data %>% mutate(across(everything(), ~ as.numeric(as.character(.))))
# 4. Calculate the correlation matrix cor_matrix_health <- cor(health_data_numeric, use = “complete.obs”, method = “spearman”)
corrplot(cor_matrix_health, method = “color”, type = “upper”, addCoef.col = “black”, tl.col = “black”, tl.srt = 45) ```

It looks like there is no significant relationship between food desert areas and health issues like obesity or diabetes. However, there is a strong correlation between obesity and diabetes, and further investigations should be done with other variables to determine their impact.


SMART Q4

Can we develop a predictive model that accurately identifies high-risk areas likely to be or become food deserts based on social and economic indicators?

Logit-model performance metrics

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction     0     1
##          0 18676  2321
##          1   337   425
##                                         
##                Accuracy : 0.878         
##                  95% CI : (0.873, 0.882)
##     No Information Rate : 0.874         
##     P-Value [Acc > NIR] : 0.0366        
##                                         
##                   Kappa : 0.198         
##                                         
##  Mcnemar's Test P-Value : <2e-16        
##                                         
##             Sensitivity : 0.1548        
##             Specificity : 0.9823        
##          Pos Pred Value : 0.5577        
##          Neg Pred Value : 0.8895        
##              Prevalence : 0.1262        
##          Detection Rate : 0.0195        
##    Detection Prevalence : 0.0350        
##       Balanced Accuracy : 0.5685        
##                                         
##        'Positive' Class : 1             
## 
## 
## --- Model Performance Metrics ---
## Accuracy : 0.878
## Sensitivity (Recall): 0.155
## Specificity: 0.982
## Precision : 0.558
## F1 Score : 0.242
roc_obj <- roc(actual_class, predicted_probs)

# Plot ROC
plot(roc_obj, main = "ROC Curve for Food Desert Prediction", col = "blue", lwd = 2, print.auc = TRUE)

auc_value <- auc(roc_obj)

cat("\nAUC:", round(auc_value, 3), "\n")

Random forest performance metrics

## 
## Call:
##  randomForest(formula = FoodDesert ~ ., data = rf_data, ntree = 500,      mtry = floor(sqrt(length(final_selected_vars))), importance = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 8
## 
##         OOB estimate of  error rate: 12.3%
## Confusion matrix:
##       0   1 class.error
## 0 18608 405      0.0213
## 1  2264 482      0.8245
rf_probs <- predict(rf_model, type = "prob")[,2] 

rf_predicted_class <- predict(rf_model, type = "response")

rf_actual_class <- rf_data$FoodDesert


rf_conf_matrix <- confusionMatrix(rf_predicted_class, rf_actual_class, positive = "1")
print(rf_conf_matrix)


rf_accuracy <- rf_conf_matrix$overall['Accuracy']
rf_sensitivity <- rf_conf_matrix$byClass['Sensitivity']
rf_specificity <- rf_conf_matrix$byClass['Specificity']
rf_precision <- rf_conf_matrix$byClass['Precision']
rf_f1 <- rf_conf_matrix$byClass['F1']

cat("\n--- Random Forest Performance Metrics ---\n")
cat("Accuracy :", round(rf_accuracy, 3), "\n")
cat("Sensitivity (Recall):", round(rf_sensitivity, 3), "\n")
cat("Specificity:", round(rf_specificity, 3), "\n")
cat("Precision :", round(rf_precision, 3), "\n")
cat("F1 Score :", round(rf_f1, 3), "\n")

# ROC Curve and AUC
rf_roc_obj <- roc(rf_actual_class, rf_probs)

plot(rf_roc_obj, main = "Random Forest ROC Curve", col = "darkgreen", lwd = 2, print.auc = TRUE)

rf_auc <- auc(rf_roc_obj)

cat("\nAUC:", round(rf_auc, 3), "\n")
rf_var_imp <- importance(rf_model)

rf_var_imp_df <- data.frame(
  Variable = rownames(rf_var_imp),
  Importance = rf_var_imp[, "MeanDecreaseGini"]
)

rf_var_imp_df <- rf_var_imp_df %>%
  mutate(
    ImportancePct = 100 * Importance / sum(Importance)  # Convert to %
  ) %>%
  arrange(desc(ImportancePct))  # Sort descending


top10_rf_var_imp_df <- rf_var_imp_df %>%
  slice(1:10)


ggplot(top10_rf_var_imp_df, aes(x = reorder(Variable, ImportancePct), y = ImportancePct)) +
  geom_bar(stat = "identity", fill = "coral") +
  geom_text(aes(label = paste0(round(ImportancePct, 1), "%")), 
            hjust = -0.1, size = 4) +  # Add % labels outside bars
  coord_flip() +
  labs(
    title = "Top 10 Feature Importances (Random Forest)",
    x = "Variable",
    y = "Importance (%)"
  ) +
  theme_minimal(base_size = 14) +
  ylim(0, max(top10_rf_var_imp_df$ImportancePct) * 1.2)  

The random forest performs slightly better as it captures the non-linear relationship of the variables more accurately. As for the features, PovertyRate and TractSNAP stand out which align with the correlation matrix we saw earlier. Further analysis using shapley values should be conducted to examine the full effect of these variables. Additionally, it is recommended to build multiple models on the entire feature list and then iteratively deduce the features separately instead of using the filtered features from another model. This reduces any model bias.